library (tidyverse)
library (dplyr)
library (sf)
library (rnaturalearth)
library (viridis)
library (gganimate)
library (gifski)
library (ggridges)
Source: World Development Indicators: https://databank.worldbank.org/source/world-development-indicators Series related to economy, education, environment, health, labor, poverty and public sector
# Reading files
# wdi <- world development indicators
# ind <- indicators selected
# df_cont <- main indicators calculated by continent
# world <- world basic indicators
wdi <- read.csv ("./data/wdi.csv", header = TRUE, sep = ",", na="..")
ind <- read.csv ("./data/indicators_selected.csv", header = TRUE, sep = ";", na="")
df_cont <- read.csv ("./data/indicators_by_continent.csv", header = TRUE, sep = ",", na="..", col.names = c("year", "year_code", "continent", "continent_code", "unemp", "emp_vuln", "out_sch_ado", "out_sch_child", "pov_ratio"))
# Extracting the selected indicators into the data from the world bank database, joining the 2 tables
# Transforming the columns with the years in rows (under the column year)
# Changing the format of the column year from X1971..YR1971. to 1971
df <- inner_join (wdi, ind, by = c("ï..Series.Name" = "series_name"))
df <- pivot_longer (df, 'X1971..YR1971.': 'X2020..YR2020.', names_to='year', values_to='values')
df <- mutate (df, year = as.integer(str_sub(year, start = 2, end = 5)))
# Renaming some columns and keeping only the data which interest us
# Transforming each indicator in columns (under the column year)
#colnames (df)
df<- rename(df, country_name = Country.Name, country_code = Country.Code)
df<- select (df, indicator, country_name, country_code, year, values)
df <- pivot_wider (df, names_from = "indicator", values_from = "values")
# Including some columns to analyze inequality
# Including a column with the percentage difference between the income share held by highest 10% and income share held by lowest 10%
# Including a column with the percentage difference between the income share held by highest 20% and income share held by lowest 20%
df<- df %>%
mutate (ineq_10 = inc_high10/inc_low10 -1)%>%
mutate (ineq_20 = inc_high20/inc_low20 -1)
# Writing data in a table
write.table(df, file= "df.csv")
df <- read.table("df.csv")
# Obtaining the georeferenced database for the countries
countries <- ne_countries(scale = "large", returnclass = "sf")
colnames (countries)
[1] "featurecla" "scalerank" "labelrank" "sovereignt" "sov_a3" "adm0_dif" "level" "type" "admin" "adm0_a3"
[11] "geou_dif" "geounit" "gu_a3" "su_dif" "subunit" "su_a3" "brk_diff" "name" "name_long" "brk_a3"
[21] "brk_name" "brk_group" "abbrev" "postal" "formal_en" "formal_fr" "name_ciawf" "note_adm0" "note_brk" "name_sort"
[31] "name_alt" "mapcolor7" "mapcolor8" "mapcolor9" "mapcolor13" "pop_est" "pop_rank" "gdp_md_est" "pop_year" "lastcensus"
[41] "gdp_year" "economy" "income_grp" "wikipedia" "fips_10_" "iso_a2" "iso_a3" "iso_a3_eh" "iso_n3" "un_a3"
[51] "wb_a2" "wb_a3" "woe_id" "woe_id_eh" "woe_note" "adm0_a3_is" "adm0_a3_us" "adm0_a3_un" "adm0_a3_wb" "continent"
[61] "region_un" "subregion" "region_wb" "name_len" "long_len" "abbrev_len" "tiny" "homepart" "min_zoom" "min_label"
[71] "max_label" "ne_id" "wikidataid" "name_ar" "name_bn" "name_de" "name_en" "name_es" "name_fr" "name_el"
[81] "name_hi" "name_hu" "name_id" "name_it" "name_ja" "name_ko" "name_nl" "name_pl" "name_pt" "name_ru"
[91] "name_sv" "name_tr" "name_vi" "name_zh" "geometry"
# Selecting only the columns of interest
countries <- countries%>%
select (name, continent, adm0_a3, iso_a3, economy, income_grp, geometry)
# Other columns: type, admin, region_un, subregion, region_wb, pop_est, pop_rank, gdp_md_est, pop_year, lastcensus, gdp_year
# Analyzing which columns will not match in joining and treating them
no_meet <-anti_join(df, countries, by = c( "country_code"= "iso_a3"))
no_meet%>%
distinct(country_code)
countries<- countries%>%
mutate(cod = case_when(
adm0_a3=='FRA'~ 'FRA',
adm0_a3=='KOS'~ 'XKX',
adm0_a3=='NOR'~ 'NOR',
adm0_a3=='GGY'~ 'CHI',
TRUE ~ iso_a3
)
)
countries
Simple feature collection with 255 features and 7 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -180 ymin: -90 xmax: 180 ymax: 83.6341
CRS: +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
First 10 features:
name continent adm0_a3 iso_a3 economy income_grp geometry cod
1 Indonesia Asia IDN IDN 4. Emerging region: MIKT 4. Lower middle income MULTIPOLYGON (((117.7036 4.... IDN
2 Malaysia Asia MYS MYS 6. Developing region 3. Upper middle income MULTIPOLYGON (((117.7036 4.... MYS
3 Chile South America CHL CHL 5. Emerging region: G20 3. Upper middle income MULTIPOLYGON (((-69.51009 -... CHL
4 Bolivia South America BOL BOL 5. Emerging region: G20 4. Lower middle income MULTIPOLYGON (((-69.51009 -... BOL
5 Peru South America PER PER 5. Emerging region: G20 3. Upper middle income MULTIPOLYGON (((-69.51009 -... PER
6 Argentina South America ARG ARG 5. Emerging region: G20 3. Upper middle income MULTIPOLYGON (((-67.28475 -... ARG
7 Dhekelia Asia ESB <NA> 2. Developed region: nonG7 2. High income: nonOECD MULTIPOLYGON (((33.78094 34... <NA>
8 Cyprus Asia CYP CYP 6. Developing region 2. High income: nonOECD MULTIPOLYGON (((33.78183 34... CYP
9 India Asia IND IND 3. Emerging region: BRIC 4. Lower middle income MULTIPOLYGON (((77.80035 35... IND
10 China Asia CHN CHN 3. Emerging region: BRIC 3. Upper middle income MULTIPOLYGON (((78.91595 33... CHN
# joining the main data frame with the table containing the geographic information of the countries
# verifying the number of lines and if all rows matched appropriately, using
# anti_join() - return all rows from x where there are not matching values in y, keeping just columns from x
df_geo <- left_join (df, countries, by = c( "country_code"= "cod"))
# Writing data in a table
# write.table(df_geo, file= "df_geo.csv")
# Extracting the main indicators and fill the blank values with the more recent
df_fill <- df_geo %>%
select (country_name, country_code, continent, year, pop, pov_ratio, out_sch_child, unemp, emp_vuln, ineq_10, ineq_20, inc_low10, inc_low20, inc_low20_40, inc_low40_60, inc_low60_80, inc_high10, inc_high20, pop_slum, lend_bor, acc_wat, acc_ele, acc_san, child_mort, geometry)%>%
# filter (year== 2017) as
# count(is.na(pov_ratio)) #Verify the quantity of NA data
# As there are many countries without data, I will fill with data from 2010 to 2020 (including the latest data)
filter (year>= 2010) %>%
group_by(country_name) %>%
fill(pop:child_mort) %>%
ungroup () %>%
filter (year== 2020)
# creating specific tables for each analyse
poverty<- df_fill%>%
drop_na ("pov_ratio")
education<- df_fill%>%
drop_na ("out_sch_child")
inequality<- df_fill%>%
drop_na ("ineq_10")
unemployment<- df_fill%>%
drop_na ("unemp")
emp_vulnerable<- df_fill%>%
drop_na ("emp_vuln")
income10 <- df_fill %>% # selecting data related to income and inequality
select (country_name, continent, year, ineq_10, inc_low10, inc_high10) %>%
drop_na (inc_low10, inc_high10) %>%
pivot_longer ("inc_low10": "inc_high10", names_to = "indicator", values_to = "values")
income20 <- df_fill %>% # selecting data related to income and inequality - quintiles
select (country_name, continent, year, ineq_20, inc_low20, inc_low20_40, inc_low40_60, inc_low60_80, inc_high20) %>%
drop_na (inc_low20, inc_low20_40, inc_low40_60, inc_low60_80, inc_high20) %>%
pivot_longer ("inc_low20": "inc_high20", names_to = "indicator", values_to = "values")
df_fill_hist <- df_geo %>% # selecting historic data and fill the null values (for use in the animated graphic)
select (country_name, continent, year, pop, pov_ratio, out_sch_child, unemp, emp_vuln, acc_wat, acc_ele, acc_san, child_mort)%>%
filter (year>= 1980) %>%
group_by(country_name) %>%
fill(pop:child_mort) %>%
ungroup ()%>%
filter (year>= 1990)
#filter (year== 1990 | year== 2000 | year== 2020)
df_hist <- df_geo %>% # selecting historic data without any fill
select (country_name, continent, year, pov_ratio, out_sch_child, unemp, emp_vuln, pop)
#drop_na (pov_ratio)
# This function generates a colored map, according to the ranking of countries in a specific indicator
map_indicator <- function(df_fields, df, indicator, ind, orientation){
# Text for title
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
title = paste (series, unit)
# Text for label
if (orientation ==1){
df1<- df%>%
drop_na (!!indicator)%>%
group_by (group = factor(ntile(!!indicator, 5))) %>%
summarise (upper = max(!!indicator), lower = min(!!indicator)) %>%
mutate(label = str_c(sprintf("%.1f", lower), " to ", sprintf("%.1f", upper)))
}
else{
df1<- df%>%
drop_na (!!indicator)%>%
group_by (group = factor(ntile(desc(!!indicator), 5))) %>%
summarise (upper = max(!!indicator), lower = min(!!indicator)) %>%
mutate (label = str_c(sprintf("%.1f", lower), " to ", sprintf("%.1f", upper)))
}
# print (head(df1))
if (orientation == 1)
{
data <- st_sf(df)
ggplot() +
geom_sf(data= data, aes(fill = factor(ntile((!!indicator),5)))) +
scale_fill_viridis_d(name = paste (subject, "(5 ntile)"), labels = c (df1$label))+
labs (title = title, caption = "Latest data, between the years 2010 to 2020")+
# geom_sf_text(data=data, aes(label = str_wrap(indicator, 1)), size = 4)+
theme_void()
}
else
{
data <- st_sf(df)
ggplot() +
geom_sf(data= data, aes(fill = factor(ntile(desc(!!indicator),5)))) +
scale_fill_viridis_d(name = paste (subject, "(5 ntile)"), labels = c (df1$label))+
labs (title = title, caption = "Latest data, between the years 2010 to 2020")+
# geom_sf_text(data=data, aes(label = str_wrap(indicator, 1)), size = 4)+
theme_void()
}
}
# Inputs for the analyses
map_indicator (ind, df_fill, quo(pov_ratio), "pov_ratio", -1)
`summarise()` ungrouping output (override with `.groups` argument)
# This function generates a colored map of one continent, according to the ranking of countries in a specific indicator
map_indicator_continent <- function(cont, df_fields, df, indicator, ind, orientation){
# Text for title
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
title = paste (series, unit)
# Text for label
if (orientation ==1){
df1<- df%>%
filter (continent==cont)%>%
drop_na (!!indicator)%>%
group_by (group = factor(ntile(!!indicator, 5))) %>%
summarise (upper = max(!!indicator), lower = min(!!indicator)) %>%
mutate(label = str_c(sprintf("%.1f", lower), " to ", sprintf("%.1f", upper)))
}
else{
df1<- df%>%
filter (continent==cont)%>%
drop_na (!!indicator)%>%
group_by (group = factor(ntile(desc(!!indicator), 5))) %>%
summarise (upper = max(!!indicator), lower = min(!!indicator)) %>%
mutate (label = str_c(sprintf("%.1f", lower), " to ", sprintf("%.1f", upper)))
}
#print (head(df1))
if (orientation == 1)
{
data <- st_sf(df)%>%
filter (continent==cont)
ggplot() +
geom_sf(data= data, aes(fill = factor(ntile((!!indicator),5)))) +
scale_fill_brewer (name = paste (subject, "(5 ntile)"), labels = c (df1$label), direction=-1)+
# geom_sf_text(data= data, aes(label = country_code), size = 4)+
# geom_sf_text(data=data, aes(label = str_wrap(!!indicator, 1)), size = 3, fontface = "bold")+
labs (title = title, caption = "Latest data, between the years 2010 to 2020")+
theme_void()
}
else
{
data <- st_sf(df)%>%
filter (continent==cont)
ggplot() +
geom_sf(data= data, aes(fill = factor(ntile(desc(!!indicator),5)))) +
scale_fill_brewer (name = paste (subject, "(5 ntile)"), labels = c (df1$label), direction=-1)+
# geom_sf_text(data= data, aes(label = country_code), size = 4)+
# geom_sf_text(data=data, aes(label = str_wrap(!!indicator, 1)), size = 3, fontface = "bold")+
labs (title = title, caption = "Latest data, between the years 2010 to 2020")+
theme_void()
}
}
map_indicator_continent ("Africa", ind, df_fill, quo(pov_ratio), "pov_ratio", -1)
`summarise()` ungrouping output (override with `.groups` argument)
# This function generates a map with diverging colors, according to the valuer of a specific indicator
map_indicator_diverging <- function(df_fields, df, indicator, ind, orientation){
library("colorspace")
# Text for title
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
title = paste (series, unit)
if (orientation == 1)
{
data <- st_sf(df)
ggplot() +
geom_sf(data= data, aes(fill = !!indicator)) +
scale_fill_gradientn (colors = c("darkred", "red", "white", "blue", "darkblue"), breaks = c (-30,-10,0,10,30), na.value = "grey50")+
labs (title = title, caption = "Latest data, between the years 2010 to 2020")+
# geom_sf_text(data=data, aes(label = str_wrap(indicator, 1)), size = 4)+
theme_void()
}
else
{
data <- st_sf(df)
ggplot() +
geom_sf(data= data, aes(fill = !!indicator)) +
scale_fill_gradientn (colors = c ("darkred", "red", "white", "blue", "darkblue"), breaks = c (-30,-10,0,10,30), na.value = "grey50")+
labs (title = title, caption = "Latest data, between the years 2010 to 2020")+
# geom_sf_text(data=data, aes(label = str_wrap(indicator, 1)), size = 4)+
theme_void()
}
}
# Inputs for the analyses
map_indicator_diverging (ind, df_fill, quo(lend_bor), "lend_bor", -1)
# This function generate a histogram for a specific indicator
histogram <- function(df_fields, df, indicator, ind){
# Extracting the details of the fields from the data frame (name of the subject, indicator (series) and unit)
subject<- subset (df_fields, indicator==ind, select= "dataset") # or ind%>% filter (indicator=="pov_ratio")%>% select ("dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
countries <- length(indicator)
lab_title <- paste ("# of countries according to the level of", subject)
lab_x = paste (series, unit)
df %>%
ggplot (aes (indicator))+
geom_histogram(breaks=seq (0,100, by = 10), color = "darkblue", fill = "steelblue")+
scale_x_continuous(breaks = seq (0, 100, by = 10))+
stat_bin(binwidth = 10, geom='text', color = "white", center = 5, aes(label=(..count..)), position=position_stack(0.5))+
labs (title = lab_title, x=lab_x, y = "# of countries", caption = "Latest data between the years 2010 to 2020")+
geom_text (x = 80, y = 30, label = paste("# of countries analyzed=", countries))
}
histogram (ind, poverty, poverty$pov_ratio, "pov_ratio")+
geom_text(x = 40, y = 50, label = "10 countries with more than 50% of the population
living in conditions of extreme poverty", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 55, y = 40, xend = 55, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))+
geom_text(x = 20, y = 70, label = "35 countries with more than 20% of the population
living in conditions of extreme poverty", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 25, y = 60, xend = 25, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))
# This function generate a list with the ranking countries in one specific indicator (maximum)
top_countries_max <- function(df_fields, df, indicator, ind){
n=35
# Text for labs (x, y and title)
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
lab_title <- paste ("Top", n, "countries with worst levels of", subject)
lab_x = paste (series, unit)
# Plot
df %>%
slice_max(!!indicator, n = n)%>%
ggplot (aes (x= !!indicator, y= reorder(country_name, !!indicator, max), fill=continent))+
geom_text(aes(label=round(!!indicator,2), hjust=-0.3),size=5)+
scale_fill_manual (values = c("indianred1", "yellow", "deeppink1","cyan", "orange", "gray", "seagreen3"), breaks = c("Africa", "Asia", "Europe","North America", "Oceania", "Seven Seas", "South America"))+
geom_bar(stat="identity")+
labs (title = lab_title, x=lab_x, y = "Countries", caption = "Latest data between the years 2010 to 2020")+
theme(text = element_text(size = 20), legend.position = "bottom")
}
top_countries_max (ind, poverty, quo(pov_ratio), "pov_ratio")
# This function generate a list with the ranking countries in one specific indicator (minimum)
top_countries_min <- function(df_fields, df, indicator, ind){
n=35
# Text for labs (x, y and title)
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
lab_title <- paste ("Top", n, "countries with worst levels of", subject)
lab_x = paste (series, unit)
# Plot
df %>%
slice_min(!!indicator, n = n)%>%
ggplot (aes (x= !!indicator, y= reorder(country_name, !!indicator, min), fill=continent))+
geom_text(aes(label=round(!!indicator,2), hjust=-0.3),size=5)+
scale_fill_manual (values = c("indianred1", "yellow", "deeppink1","cyan", "orange", "gray", "seagreen3"), breaks = c("Africa", "Asia", "Europe","North America", "Oceania", "Seven Seas", "South America"))+
geom_bar(stat="identity")+
labs (title = lab_title, x=lab_x, y = "Countries", caption = "Latest data between the years 2010 to 2020")+
theme(text = element_text(size = 20), legend.position = "bottom")
}
top_countries_min (ind, poverty, quo(lend_bor), "lend_bor")
# This function generates a boxplot of the indicator for each continent
box_plot <- function(df_fields, df, indicator, ind){
# Text for labs (x, y and title)
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
lab_title <- paste ("Boxplot - ", series, "by continent", unit)
lab_y = paste (series, unit)
df %>%
drop_na (!!indicator)%>%
ggplot (aes(x=continent, y= !!indicator, fill = continent, label = country_name),size = pop)+
geom_boxplot (alpha = 0.6)+
#geom_violin()+
geom_point(alpha = 0.6)+
labs (title = lab_title, y=lab_y)+
#geom_text(hjust = -.1, nudge_x=0.02, alpha = .4)+
theme(text = element_text(size = 20))
}
box_plot (ind, df_fill, quo(pov_ratio), "pov_ratio")
# This function generates a graph with the history of each variable for each country
plot_hist_countries <- function (df_fields, df, indicator, ind, orientation){
# Text for labs (x, y and title)
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
lab_title <- paste (series, "per country along the years", unit)
df%>%
ggplot (aes (x= year, y= reorder(country_name, !!indicator, max), color= !!indicator, size=!!indicator))+
scale_color_viridis_c (direction= orientation)+
geom_point()+
scale_x_continuous (breaks = seq(1970, 2020, by = 10))+
theme(text = element_text(size = 25))+
labs (title = lab_title, x="Years", y = "countries")+
facet_wrap ('continent', scales = 'free')
}
plot_hist_countries (ind, poverty_hist, quo(pov_ratio), "pov_ratio", -1)
# This function generates a hot map of the indicator by continent
hot_map <- function (df_fields, df, indicator, ind, orientation){
# Text for labs (title)
subject<- subset (df_fields, indicator==ind, select= "dataset")
series <- subset (df_fields, indicator==ind, select= "series")
unit <- subset (df_fields, indicator==ind, select= "unit")
lab_title <- paste ("Hot map - ", series, unit)
# Limits for the legend
if (orientation ==-1){
df1<- df%>%
drop_na (!!indicator)%>%
group_by (group = factor(ntile(!!indicator, 5))) %>%
summarise (upper = max(!!indicator), lower = min(!!indicator)) %>%
mutate(label = str_c(sprintf("%.1f", lower), " to ", sprintf("%.1f", upper)))
}
else{
df1<- df%>%
drop_na (!!indicator)%>%
group_by (group = factor(ntile(desc(!!indicator), 5))) %>%
summarise (upper = max(!!indicator), lower = min(!!indicator)) %>%
mutate (label = str_c(sprintf("%.1f", lower), " to ", sprintf("%.1f", upper)))
}
# print(df1)
df%>%
drop_na (!!indicator) %>%
ggplot (aes (x= year, y= continent, fill = (ntile(!!indicator, 5))))+
geom_tile()+
scale_fill_viridis (name = paste (subject, "(5 ntile)"), direction = orientation, labels = c (df1$label))+
scale_x_discrete (breaks = seq (1950, 2020, by=5))+
labs (title = lab_title, y = "Continent", x= "Year", caption = "Latest data between the years 2010 to 2020")
#theme(text = element_text(size = 15))
}
hot_map (ind, df_cont, quo(emp_vuln), "emp_vuln", -1)
`summarise()` ungrouping output (override with `.groups` argument)
# This function generates a graph with the correlation between 2 variables
correlation <- function (df_fields, df, indicator1, indicator2, ind1, ind2){
# Text for title
series1 <- subset (df_fields, indicator==ind1, select= "series")
unit1 <- subset (df_fields, indicator==ind1, select= "unit")
series2 <- subset (df_fields, indicator==ind2, select= "series")
unit2 <- subset (df_fields, indicator==ind2, select= "unit")
lab_x <- paste (series1, unit1)
lab_y <- paste (series2, unit2)
lab_title <- paste (lab_x, "x", lab_y)
df%>%
drop_na (!!indicator1, !!indicator2)%>%
ggplot (aes (x= !!indicator1, y= !!indicator2, color = continent,label = country_name, size = ntile(pop,5)))+
geom_point(alpha = 0.6)+
scale_colour_manual (values = c("indianred1", "yellow", "deeppink1","cyan", "orange", "gray", "seagreen3"))+
labs (title = lab_title, x=lab_x, y = lab_y, caption = "Latest data between the years 2010 to 2020")+
geom_text(hjust = -.1, nudge_x=0.02, alpha = .3)
}
correlation (ind, df_fill, quo(ineq_10), quo(pov_ratio), "ineq_10", "pov_ratio")
Other graphics, more specific and complex, were included at the end of the report.
POVERTY
Where do the world’s poorest people live today?
map_indicator (ind, df_fill, quo(pov_ratio), "pov_ratio", -1)
Approximately 1/5 of the countries in the world have more than 26% of the population living with up to 1.90 a day. These countries are concentrated in Africa.
map_indicator_continent ("Africa", ind, df_fill, quo(pov_ratio), "pov_ratio", -1)
Analyzing Africa in more detail, it is possible to see the largest region of poverty, where 51 to 77% of population is living with up to 1.90 a day.
Under what conditions people live today?
map_indicator (ind, df_fill, quo (pop_slum),"pop_slum", -1)
map_indicator (ind, df_fill, quo (acc_ele),"acc_ele", 1)
map_indicator_continent ("Africa", ind, df_fill, quo(acc_ele), "acc_ele", 1) # Zoom in Africa, but there are other continents with low values
map_indicator (ind, df_fill, quo (acc_wat),"acc_wat", 1)
map_indicator (ind, df_fill, quo (acc_san),"acc_san", -1)
map_indicator_continent ("Africa", ind, df_fill, quo(acc_san), "acc_san", -1) # Zoom in Africa, but there are other continents with low values
map_indicator (ind, df_fill, quo (child_mort),"child_mort", -1)
There are still many countries with a high percentage of the population living in subhuman conditions:
Where do the world’s poorest people live today?
histogram (ind, poverty, poverty$pov_ratio, "pov_ratio")+
geom_text(x = 40, y = 50, label = "10 countries with more than 50% of the population
living in conditions of extreme poverty", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 55, y = 40, xend = 55, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))+
geom_text(x = 20, y = 70, label = "35 countries with more than 20% of the population
living in conditions of extreme poverty", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 25, y = 60, xend = 25, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))
top_countries_max (ind, poverty, quo(pov_ratio),"pov_ratio")
The top 30 countries with extremely poverty are from Africa.
How is the behaviour of extreme poverty by continent?
box_plot (ind, df_fill, quo(pov_ratio), "pov_ratio")
# box_plot (ind, df_fill, quo(acc_ele), "acc_ele")
In addition to Africa, Asia, North America and Oceania have countries with more than 20% of the population living in poverty.
How has evolved poverty per country along the years?
plot_hist_countries (ind, df_hist, quo(pov_ratio), "pov_ratio", -1)
The extremely poverty is decreasing in the majority of countries in the world.
Asia: considerable reduction in extreme poverty, e.g.,Indonesia, China, Nepal and India. Only one exception: Uzbekistan, but there is no data from recent years of this country. Americas: also had a reduction, with exception of Venezuela (that doesn’t have data for the recent years). Oceania and Europe: values have decreased or remain relatively low over the years. In Africa, there are more extreme poverty and although it is decreasing in many countries, in some others it is increasing (Madagascar, Malawi, Guinea Bissau, Angola, Sao Tome and Principe, Zambia, Zimbabwe and Cote d’Ivoire).
How has evolved the poverty by continent along the years?
hot_map (ind, df_cont, quo(pov_ratio), "pov_ratio", -1)
`summarise()` ungrouping output (override with `.groups` argument)
The graph above shows the improvement in performance on this indicator in all continents. Only in Middle East & North Africa this indicator worsened in recent years.
EDUCATION
"In which countries are more children out of school?
map_indicator (ind, df_fill, quo(out_sch_child),"out_sch_child", -1)
Approximately 1/5 of the countries have more than 12% of the children out of school.
histogram (ind, education, education$out_sch_child, "out_sch_child")+
geom_text(x = 40, y = 50, label = "2 countries with more than 50% of the children
out of school (% of primary school age)", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 55, y = 40, xend = 55, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))+
geom_text(x = 20, y = 70, label = "21 countries with more than 20% of the children out of school
(% of primary school age)", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 25, y = 60, xend = 25, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))
top_countries_max (ind, education, quo(out_sch_child),"out_sch_child")
South Sudan is the worst country in this indicator, with 62.3% of children out of school (% of primary school age). In North America, Turks and Caicos Islands is the worst country in primary education, with 27.84% of children out of school. Syrian Arab Republic in Asia, with 27.61% of children out of school. Marshall Islands in Oceania, with 25.86% of children out of school. Bulgaria appears in the 35th position in the ranking of countries with the most children out of school.
How are the education indicator statistics by continent (boxplot)?
box_plot (ind, df_fill, quo(out_sch_child),"out_sch_child")
How has evolved the education indicator per country along the years?
plot_hist_countries (ind, df_hist, quo(out_sch_child), "out_sch_child", -1)
The vast majority of countries in the world are improving their performance in this indicator. However, there are still some countries that are getting worse, such as South Sudan, Equatorial Guinea, Congo (Dem. Rep.), Turcs and Caicos Islands, Suriname, Bolivia. Paraguay and Venezuela already improved and got worse.
How has evolved the education indicator by continent along the years?
hot_map (ind, df_cont, quo(out_sch_child), "out_sch_child", -1)
`summarise()` ungrouping output (override with `.groups` argument)
The graph above shows the improvement in performance on this indicator in all continents.
EMPLOYMENT
In which countries are more unemployment and vulnerable employment?
map_indicator (ind, df_fill, quo(unemp),"unemp", -1)
map_indicator (ind, df_fill, quo (emp_vuln),"emp_vuln", -1)
Approximately 1/5 of the countries have more than 11% of unemployment (persons unemployed/ total labor force). Approximately 2/5 of the countries have more than 40% of vulnerable employment (% of total employment).
histogram (ind, unemployment, unemployment$unemp, "unemp")+
geom_text(x = 20, y = 70, label = "7 countries with more than 20% of the labour force unemployed)", hjust = 0, vjust = 0.5,
colour = "red", size = 4)+
geom_segment(aes(x = 25, y = 60, xend = 25, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))
histogram (ind, emp_vulnerable, emp_vulnerable$emp_vuln, "emp_vuln")+
geom_text(x = 40, y = 40, label = "59 countries with more than 50% of
vulnerable employment)", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 55, y = 35, xend = 55, yend = 15), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))+
geom_text(x = 27, y = 50, label = "116 countries with more than 20% of
vulnerable employment)", hjust = 0, vjust = 0.5, colour = "red", size = 4)+
geom_segment(aes(x = 25, y = 53, xend = 25, yend = 25), colour="red", size=0.5, arrow = arrow(length = unit(0.5, "cm")))
top_countries_max (ind, unemployment, quo(unemp),"unemp")
top_countries_max (ind, emp_vulnerable, quo (emp_vuln),"emp_vuln")
Unemployment:
There are countries from all continents in this ranking. South Africa appears first in the ranking with 28% unemployment.
Vulnerable employment:
These 35 countries have more than 71% of people working in vulnerable employment. Burundi, Niger, Chad, Central African and Guinea are countries with more percentage of vulnerable employment in Africa. In Asia, Korea, Afghanistan, Lao PDR, Congo and India have the worst performance in this indicator. Also, Papua New Guinea in Oceania and Haiti in North America are in the ranking.
How are the unemployment and vulnerable employment indicators statistics by continent (boxplot)?
box_plot (ind, df_fill, quo(unemp),"unemp")
box_plot (ind, df_fill, quo (emp_vuln),"emp_vuln")
How have evolved unemployment and vulnerable employment per country along the years?
plot_hist_countries (ind, df_hist, quo(unemp), "unemp", -1)
plot_hist_countries (ind, df_hist, quo(emp_vuln), "emp_vuln", -1)
There is no oscillation pattern of unemployment and vulnerable work among the countries.
Generally, the unemployment level fluctuates over the years in each country. Bosnia and North Macedonia have the worst unemployment rates in Europe, West Bank and Gaza and Armenia in Asia, South Africa and Lesotho in Africa, Sta Lucia and St Vincent and the Grenadines in North America.
Regarding the vulnerable unemployment, in some countries, there has been no evolution of this indicator along the years, although in the graphic below we can notice a decrease in this kind of employment.
How have evolved unemployment and vulnerable employment per continent along the years?
hot_map (ind, df_cont, quo(unemp), "unemp", -1)
hot_map (ind, df_cont, quo(emp_vuln), "emp_vuln", -1)
Since the 1990s, the unemployment rate in the Middle East and North Africa has been high and in East Asia & Pacifica low. In the grahic above, it is possible to identify the crises of 2008 and 2009 in North America.
Analyzing the vulnerable employment in the world and in the continents, in general, it has reduced. Except in Latin America & Caribbean where it increases in recent years.
Which of the countries have the highest net borrowing (as a % of GDP)?
map_indicator_diverging (ind, df_fill, quo (lend_bor),"lend_bor", -1)
top_countries_min (ind, education, quo(lend_bor),"lend_bor")
The higher the unemployment the higher the poverty?
correlation (ind, df_fill, quo(unemp), quo(pov_ratio), "unemp", "pov_ratio")
The higher the vulnerable employment the higher the poverty?
correlation (ind, df_fill, quo(emp_vuln), quo(pov_ratio), "emp_vuln", "pov_ratio")
These variables have a good correlation.
The lower the education the higher the poverty?
correlation (ind, df_fill, quo(out_sch_child), quo(pov_ratio), "out_sch_child", "pov_ratio")
Is a more educated society more or less equal?
correlation (ind, df_fill, quo(ineq_10), quo(out_sch_child), "ineq_10", "out_sch_child")
Is a more poverty society more or less equal?
correlation (ind, df_fill, quo(ineq_10), quo(pov_ratio), "ineq_10", "pov_ratio")
How much difference in income has the richest 10% of the population compared to the poorest? Where are these differences most discrepant?
# height=10/70
# Plotting the Income inequality graph
income10 %>%
slice_max(ineq_10, n = 40)%>%
ggplot (aes (x= values, y = reorder(country_name, ineq_10, max), color = indicator))+
geom_point()+
scale_color_manual (values = c("deepskyblue4", "firebrick"), labels = c("Income by highest 10%", "Income by lowest 10%"))+
theme(text = element_text(family = "Calibri", size = 50))+
geom_line(aes(group = country_name), color = "grey50")+
labs (title = "Income share held by lowest 10% and higher 10%", subtitle= "ordering by percentage of difference between the higher 10% and the lower 10%",x = "income share held by lowest 10% and higher 10%", y = "Countries", caption = "Latest data between the years 2010 to 2020")+
theme_classic()+
theme(panel.background = element_rect(fill = "gray95"))+
coord_cartesian(xlim = c(0,60))+
#theme( panel.grid.major.y = element_line(linetype = "dashed", color = "lightgray"))+
theme(panel.border = element_rect(color='gray50', fill = NA))+
geom_text (aes(label = values), , vjust = -1)+ #nudge_y=.3
geom_label (aes (x=60,label = sprintf("%.1f%%", ineq_10)), color = "black")+
theme(text = element_text(size = 20))
#facet_wrap ('continent', scales = 'free_y')
South Africa is the country with the greatest percentage difference between the income share held by higher 10% and lowest 10%, followed by Namibia and Zambia.
The 10% of the population with the highest disposable income in South Africa received 55 times as much income as the 10% with the lowest disposable income.
# Plotting the Income inequality graph facet_wrap by continent
income10%>%
ggplot (aes (x= values, y = reorder(country_name, ineq_10, max), color = indicator))+
geom_point()+
scale_color_manual (values = c("deepskyblue4", "firebrick"), labels = c("Income by highest 10%", "Income by lowest 10%"))+
theme(text = element_text(family = "Calibri", size = 50))+
geom_line(aes(group = country_name), color = "grey50")+
labs (title = "Income share held by lowest 10% and higher 10%", subtitle= "ordering by percentage of difference between the higher 10% and the lower 10%",x = "income share held by lowest 10% and higher 10%", y = "Countries", caption = "Latest data between the years 2010 to 2020")+
theme_classic()+
theme(panel.background = element_rect(fill = "gray95"))+
coord_cartesian(xlim = c(0,65))+
#theme( panel.grid.major.y = element_line(linetype = "dashed", color = "lightgray"))+
theme(panel.border = element_rect(color='gray50', fill = NA))+
geom_text (aes (label = values), vjust = -1)+
geom_label (aes (x=60,label = sprintf("%.1f%%", ineq_10)), color = "black")+
theme(text = element_text(size = 15))+
facet_wrap ('continent', scales = 'free_y')
NA
df_fields<- ind
df1<- df_fill_hist
indicator1 <-quo(out_sch_child)
indicator2 <- quo(pov_ratio)
ind1 <- "out_sch_child"
ind2 <- "pov_ratio"
# Text for title
series1 <- subset (df_fields, indicator==ind1, select= "series")
unit1 <- subset (df_fields, indicator==ind1, select= "unit")
series2 <- subset (df_fields, indicator==ind2, select= "series")
unit2 <- subset (df_fields, indicator==ind2, select= "unit")
lab_x <- paste (series1, unit1)
lab_y <- paste (series2, unit2)
lab_title <- paste (lab_x, "x", lab_y, "along the years")
df1%>%
drop_na (!!indicator1, !!indicator2)
# filter (continent == "Asia")
p<- ggplot (df1, aes (x= !!indicator1, y= !!indicator2, color = country_name, size = pop))+
geom_point(alpha = 0.7, show.legend = FALSE)+
scale_colour_discrete()+
scale_size(range = c(2, 12)) +
labs (title = "Year: {frame_time}", x=lab_x, y = lab_y, caption = "Latest data between the years 2010 to 2020")+
facet_wrap ('continent', scales = 'free')+
transition_time (year)+ # animation
ease_aes('linear')
animate(p, renderer = gifski_renderer())